"""Pipeline trigger for GitLab repositories."""

import argparse
import base64
import copy
import json
import logging
import os
import re
import sys

from cki_lib import cki_pipeline
from cki_lib import footer
from cki_lib import misc
from cki_lib import yaml
from cki_lib.logger import get_logger
from cki_lib.session import get_session
import dateutil.parser
import gitlab
import sentry_sdk

LOGGER = get_logger('cki.cki_tools.gitlab_ci_bot')
SESSION = get_session('cki.cki_tools.gitlab_ci_bot')

# There are potentially three projects involved during the processing of a
# pipeline for a GitLab merge request:
# - pipeline_{gitlab,project}: pipeline repository
# - target_{gitlab,project}: pipeline-definition repository of CKI
# - target_gitlab, source_project: pipeline-definition fork for merge request

WELCOME_MESSAGE = """Hi! This is the friendly CKI test bot.

Everybody with developer access can mention me in a top-level comment together
with `test [PIPELINES]` and I will test this merge request. Once the testing is
done, I will post the results here.

<details>
<summary>Click here for details on how to select the tests to run.</summary>

The testing is done by retriggering formerly successful pipelines with the new
code. Pipelines are specified by [GROUP/BRANCH] or [GROUP/PIPELINE-ID].

The following pipeline groups are configured:

{groups}

Within these groups, the following branches are configured:

{branches}

When triggering a pipeline for a branch, the last successful pipeline is
retriggered.

The precise bot configuration can be found in
[cee/deployment-all](https://gitlab.cee.redhat.com/cki-project/deployment-all/-/blob/main/openshift/gitlab-ci-bot/10-configmap.yml.j2.d/bot-config.yml.j2).

</details>

<details>
<summary>Click here for details on how to adjust the functionality of the
retriggered pipeline by replacing the trigger variables of the original
pipeline.</summary>

Variables can be replaced by specifying `[name=value]` in the `test` command:

* Only run the pipeline for x86_64 and s390x: `[architectures=x86_64 s390x]`
* Run pipeline for debug kernels for x86_64 and aarch64: `[debug_architectures=x86_64 aarch64]`
* Use the latest container images: `[image_tag=latest]`
* Run the tests in Beaker (please use this functionality responsibly and
  conserve the resources if the test runs are not needed):
  `[skip_beaker=false]`
* Force a full pipeline via `[tests_only=false]` or a short pipeline with only
  the setup and testing stages via `[tests_only=true]`.
* Code coverage analysis:
  * For RHEL, please use `[gcov/<branch>][skip_beaker=false]`
  * For CentOS, please use `[centos-gcov/<branch>][skip_beaker=false]`,
    with the appropriate branch (`c9s`, `c9s-auto`)
  * To limit coverage to certain kernel directories add `[coverage_dirs=dirs1 dirs2]`.
* Limited test set: `[test_set=net]`
* Filter test names (bot uses raw comment so don't escape special characters):
  `[tests_regex=.*Networking ipsec.*]`
* Force an MR run with targeted testing: `[force_baseline=false]`
* If needed, a closing square bracket can be specified by \\u005D

The full list of available variables can be found in the
[CKI documentation](https://cki-project.org/l/trigger-variables).

</details>

<details>
<summary>Click here for details on how to test multiple merge requests across
different projects at the same time.</summary>

When testing a single merge request, a corresponding override variable is
automatically added to the trigger variables. To test multiple merge requests,
the corresponding override variables for the other merge requests must be added
manually:

{overrides}

</details>

For more details about the features, check out the
[bot documentation](https://cki-project.org/l/pipeline-bot).

{welcome_message}
{footer}
"""


def first_awardemoji(bot_login, comment):
    """Return the first bot-authored emoji, or None."""
    return next((emoji for emoji in comment.awardemojis.list(iterator=True)
                 if emoji.user['username'] == bot_login), None)


def all_done(pipelines):
    """Check whether all pipelines are done.

    This includes the GitLab states (all but pending/running) and the internal
    states from start_pipelines.
    """
    done = ('success', 'failed', 'canceled', 'skipped',
            'not-found', 'internal-error', 'merge-conflict')
    return all(p['status'] in done for p in pipelines)


def get_status_note(bot_login, discussion):
    """Return the first bot-authored note in the discussion, or None."""
    note_id = next((n['id'] for n in discussion.attributes['notes'][1:]
                    if n['author']['username'] == bot_login), None)
    return discussion.notes.get(note_id) if note_id else None


def format_status_table(pipelines, welcome_note_url):
    """Return a markdown table with the pipeline status."""
    raw = base64.encodebytes(json.dumps(pipelines).encode('utf8'))
    rows = (['| Group | Branch | ID | Status |\n| --- | --- | --- | --- |\n'] +
            [format_status_table_row(pipeline) for pipeline in pipelines] +
            [f'\n<small>See the [welcome message]({welcome_note_url}) for details '
             'on how to run tests more selectively.</small>\n'] +
            [f'<!-- {raw.decode("ascii")} -->'])
    return ''.join(rows)


def format_status_table_row(pipeline):
    """Return a markdown table row with the pipeline status."""
    pipeline_label = pipeline.get('pipeline_label')
    pipeline_branch = pipeline.get('pipeline_branch')
    pipeline_id = pipeline.get('id')
    if 'web_url' in pipeline:
        pipeline_id = f'[{pipeline_id}]({pipeline["web_url"]})'
    status_icon = {
        'created': ':hourglass_flowing_sand:',
        'pending': ':hourglass_flowing_sand:',
        'running': ':hourglass_flowing_sand:',
        'canceled': ':grey_exclamation:',
        'success': ':heavy_check_mark:',
        'not-found': ':grey_question:',
    }.get(pipeline['status'], ':exclamation:')
    pipeline_status = f'{status_icon} {pipeline["status"]}'
    if pipeline.get('stage'):
        pipeline_status += f' ([{pipeline["stage"]}]({pipeline["job"]}))'
    return (f'| {pipeline_label} | {pipeline_branch} ' +
            f'| {pipeline_id} | {pipeline_status} |\n')


def format_status_list(pipelines):
    """Return a plain list with the pipeline status."""
    rows = [format_status_list_item(pipeline) for pipeline in pipelines]
    return ''.join(rows)


def format_status_list_item(pipeline):
    """Return a list item row with the pipeline status."""
    pipeline_label = pipeline.get('pipeline_label')
    pipeline_branch = pipeline.get('pipeline_branch')
    pipeline_id = pipeline.get('web_url') or pipeline.get('id')
    pipeline_status = pipeline.get('status')
    return (f'- {pipeline_id} ({pipeline_label}/{pipeline_branch}): ' +
            f'{pipeline_status}\n')


def set_status_note(discussion, note, pipelines, welcome_note_url, force_notify=False):
    """Post the status of the pipelines in comment form.

    The comment contains a visible table and the raw data base64-encoded in a
    hidden HTML comment.

    If no note is passed, a new note is created at the end of the discussion.
    Notes that cause emails are first created with plain text, and then changed
    to the table.
    """
    body = format_status_table(pipelines, welcome_note_url)
    if not note:
        # an email notification will be sent, so create it plain-text first
        note = discussion.notes.create(
            {'body': format_status_list(pipelines)})
        note.body = body + footer.Footer().gitlab_footer()
        note.save()
    else:
        note.body = body + footer.Footer().gitlab_footer('updated')
        note.save()
        if force_notify:
            # force a plain-text email notification
            note = discussion.notes.create(
                {'body': format_status_list(pipelines)})
            note.delete()


def parse_status_note(body):
    """Parse the raw data of the status note."""
    match = re.search(r'(?<=<!-- ).*(?= -->)', body, flags=re.DOTALL)
    if not match:
        return {}
    return json.loads(base64.decodebytes(match.group(0).encode('utf8')))


def format_short_names(short_names):
    """Return a list of pipeline groups."""
    return ''.join(f'[{n}]' for n in short_names)


def format_long_name(pipeline_config):
    """Return a plural-form pipeline group name with optional description."""
    name = f'{pipeline_config["name"]}s'
    if 'description' in pipeline_config:
        name += f' ({pipeline_config["description"]})'
    return name


def format_groups(pipelines_config):
    """Return a markdown list of pipeline groups."""
    items = [f'* `[{p}]`: {format_long_name(pipelines_config[p])}\n'
             for p in pipelines_config.keys()]
    return ''.join(items)


def format_branch_list(pipelines_config):
    """Format a markdown list of pipeline branches."""
    items = [f'* `[{p}/{branch}]`\n'
             for p in pipelines_config.keys()
             for branch in pipelines_config[p]['default_branches']]
    return ''.join(items)


def format_overrides_list(projects_config):
    """Format a markdown list of all available overrides."""
    return ''.join([
        '| Project | Variable          |\n',
        '|---------|-------------------|\n',
    ] + sorted(
        f'| {name} | `[{key}={value}]` |\n'
        for name, project_config in projects_config.items()
        for key, value in build_override_variable(project_config, 123).items()
    ))


def create_welcome_note(pipelines_config, project_config, projects_config, merge_request) -> int:
    """Create a note with the welcome comment."""
    values = {
        'groups': format_groups(pipelines_config),
        'branches': format_branch_list(pipelines_config),
        'overrides': format_overrides_list(projects_config),
        'welcome_message': project_config.get('welcome_message', ''),
        'footer': footer.Footer().gitlab_footer(),
    }
    return merge_request.notes.create({'body': WELCOME_MESSAGE.format(**values)}).id


def update_pipeline(pipeline_config, pipeline):
    """Update the status of the pipelines from GitLab."""
    if not (pipeline_id := pipeline['id']):
        return
    with gitlab.Gitlab(pipeline_config['gitlab_url'],
                       private_token=os.environ[pipeline_config['private_token']],
                       session=SESSION) as pipeline_gitlab:
        gl_pipeline = pipeline_gitlab.projects.get(pipeline['project']).pipelines.get(pipeline_id)
        pipeline['status'] = gl_pipeline.status
        if (gl_pipeline.status in ('failed', 'running') and
            (gl_jobs := gl_pipeline.jobs.list(
                scope=gl_pipeline.status, per_page=1, all=False))):
            pipeline['job'] = gl_jobs[0].web_url
            pipeline['stage'] = gl_jobs[0].stage
        else:
            # only show job links for failed/running pipelines
            pipeline.pop('job', None)
            pipeline.pop('stage', None)


def update_pipelines(pipelines_config, pipelines):
    """Update the status of the pipelines from GitLab."""
    pipelines = copy.deepcopy(pipelines)
    for pipeline in pipelines:
        with misc.only_log_exceptions():
            update_pipeline(pipelines_config[pipeline['pipeline_label']], pipeline)
    return pipelines


def parse_commands(body):
    """Extract and preprocess user commands from the comment note body.

    Supports:
    - [group/abc] -> results['branches'] = [('group', 'abc'), ...]
    - [key=value] -> results['variables'] = {'key': 'value'}
    """
    raw_commands = re.findall(r'(?<=\[)([^]]+?)(?=\\?\])', body)
    commands = {
        'branches': [],
        'variables': {},
    }
    for raw_command in raw_commands:
        stripped = raw_command.strip()
        if '=' in stripped:
            parts = stripped.split('=', 1)
            commands['variables'][parts[0].strip()] = parts[1].strip().replace(r'\u005D', ']')
        elif '/' in stripped:
            parts = stripped.split('/', 1)
            commands['branches'].append((parts[0].strip(), parts[1].strip()))
    return commands


def start_pipeline(pipeline_config, pipeline_branch, variable_overrides):
    """Trigger and return one pipeline, or return an error string."""
    try:
        token = os.environ[pipeline_config['private_token']]
        variable_filter = pipeline_config.get('variable_filter', {})
        virtual_branches = pipeline_config.get('virtual_branches', {})
        with gitlab.Gitlab(pipeline_config['gitlab_url'],
                           private_token=token,
                           session=SESSION) as pipeline_gitlab:
            pipeline_project = pipeline_gitlab.projects.get(
                pipeline_config['project'])
            try:
                original_pipeline_id = int(pipeline_branch)
                pipeline_project.pipelines.get(original_pipeline_id)
            except gitlab.exceptions.GitlabGetError:
                return 'not-found'
            except ValueError:
                if pipeline_branch in virtual_branches:
                    virtual_branch = virtual_branches[pipeline_branch]
                    pipeline_branch = virtual_branch['branch']
                    variable_filter.update(virtual_branch.get('variable_filter', {}))
                elif pipeline_branch not in pipeline_config['default_branches']:
                    return 'not-configured'
                original_pipeline = cki_pipeline.last_successful_pipeline_for_branch(
                    pipeline_project, pipeline_branch,
                    variable_filter=variable_filter)
                if not original_pipeline:
                    return 'not-found'
                original_pipeline_id = original_pipeline.id
            return cki_pipeline.retrigger(
                pipeline_project,
                original_pipeline_id,
                variable_overrides=variable_overrides)
    except Exception:  # pylint: disable=broad-except
        LOGGER.exception("Failed to retrigger the pipeline: %r", original_pipeline_id)
        return 'internal-error'


def build_mr_ref(mr_iid, ref='merge'):
    """Return a symbolic reference to an MR ref."""
    return f'refs/merge-requests/{mr_iid}/{ref}'


def has_merge_conflict(target_gitlab, merge_request):
    """Check whether an MR has a merge conflict."""
    target_project = target_gitlab.projects.get(merge_request.target_project_id)
    ref_name = build_mr_ref(merge_request.iid)
    try:
        ref_commit = target_project.commits.get(ref_name)
    except Exception:
        LOGGER.exception('MR-under-test has no valid commit %s', ref_name)
        return True
    if (target_project.commits.get(build_mr_ref(merge_request.iid, ref='head')).id
            not in ref_commit.parent_ids):
        LOGGER.warning('MR-under-test has outdated head commit')
        return True
    return False


def build_web_url(gitlab_url, project):
    """Return the web URL for a GitLab project, no trailing slash."""
    return f'{gitlab_url.rstrip("/")}/{project.rstrip("/")}'


def build_override_variable(project_config, mr_iid, target_gitlab=None):
    """Build the variable override variable for the retriggered pipeline."""
    project_url = build_web_url(project_config["gitlab_url"], project_config["project"])
    override_type = misc.get_nested_key(project_config, 'override/type')
    override_name = misc.get_nested_key(project_config, 'override/name')

    ref_name = build_mr_ref(mr_iid)
    if override_type == 'pipeline-definition':
        return {'pipeline_definition_branch_override': ref_name}
    if override_type == 'archive_url':
        # A Restraint pull URL is special:
        # - it is used in slightly mangled form as a file system path, and
        #   should not contain anything that could be confused with a test path
        #   (https://github.com/restraint-harness/restraint/issues/272)
        # - it is fixed to a commit SHA as the test archive is only downloaded
        #   at test runtime; using a symbolic ref would be confusing if the MR
        #   sees more pushes in the meantime
        with misc.only_log_exceptions():
            if target_gitlab:
                api_url = target_gitlab.api_url
                target_project = target_gitlab.projects.get(project_config["project"])
                target_project_id = target_project.id
                ref_commit = target_project.commits.get(ref_name)
                ref_sha = ref_commit.id
            else:
                api_url = build_web_url(project_config["gitlab_url"], 'api/v4')
                target_project_id = 123456
                ref_sha = '123456789abcdef'
            return {override_name: f'{api_url}/projects/' +
                    f'{target_project_id}/repository/archive.tar.gz?sha={ref_sha}'}
    if override_type == 'pip_url':
        return {override_name: f'git+{project_url}.git/@{ref_name}'}
    if override_type == 'image_tag':
        return {'image_tag': f'mr-{mr_iid}'}
    if override_type is None:
        return {}

    LOGGER.error('Unknown override type %s', override_type)
    return {}


def start_pipelines(pipelines_config, project_config, target_gitlab,
                    merge_request, note):
    """Trigger the appropriate pipelines."""
    commands = parse_commands(note.body)
    results = []
    for pipeline_label, pipeline_branch in sorted(commands['branches']):
        try:
            pipeline_config = pipelines_config[pipeline_label]
        except KeyError:
            continue
        print(f'processing {pipeline_branch} of {pipeline_label}')
        result = {'pipeline_branch': pipeline_branch,
                  'id': '',
                  'status': 'internal-error',
                  'pipeline_label': pipeline_label,
                  'project': pipeline_config['project']}
        results.append(result)

        if has_merge_conflict(target_gitlab, merge_request):
            result['status'] = 'merge-conflict'
            continue

        variable_overrides = build_override_variable(project_config, merge_request.iid,
                                                     target_gitlab=target_gitlab)
        variable_overrides.update(pipeline_config.get('variables', {}))
        variable_overrides.update(project_config.get('variables', {}))
        variable_overrides.update(commands['variables'])
        pipeline = start_pipeline(pipeline_config, pipeline_branch, variable_overrides)
        if isinstance(pipeline, str):
            result['status'] = pipeline
        else:
            result['id'] = pipeline.id
            result['status'] = pipeline.status
            result['web_url'] = pipeline.web_url
    return results


def is_developer(project, user_id):
    """Return whether a user is at least developer of a project."""
    matching_members = project.members_all.list(per_page=1, user_ids=[user_id])
    return matching_members and matching_members[0].access_level >= 30


def was_force_pushed(target_gitlab, merge_request, since):
    """Check if the MR was updated by a force push after the specified time."""
    source_project_id = merge_request.source_project_id
    source_project = target_gitlab.projects.get(source_project_id)
    since_datetime = dateutil.parser.parse(since)

    for event in source_project.events.list(iterator=True):
        LOGGER.debug('  checking event from %s', event.created_at)
        if dateutil.parser.parse(event.created_at) < since_datetime:
            return False

        # This catches adding more commits too but we want to complain about
        # that as well so it should be ok
        if event.action_name == 'pushed_to' and \
                event.push_data['commit_from'] is not None and \
                event.push_data['ref'] == merge_request.source_branch:
            return True

    return False


def was_committed(merge_request, since):
    """Check if the MR has any new commits since given datetime."""
    since_datetime = dateutil.parser.parse(since)
    for commit in merge_request.commits():
        LOGGER.debug('  checking commit from %s', commit.created_at)
        if dateutil.parser.parse(commit.created_at) > since_datetime:
            return True
    return False


def note_awardemoji(note, name, previous_awardemoji=None):
    """Add an emoji to a note.

    The emojis are used to keep state for a note:
    - thumbsup: pipelines have been triggered, and the pipeline status is
      updated in a follow-up note
    - checkered_flag: final results from all pipelines are available
    - thumbsdown: the testing request in the note is invalid
    """
    if previous_awardemoji:
        previous_awardemoji.delete()
    note.awardemojis.create({'name': name})


def process_merge_request(pipelines_config, project_config, projects_config,
                          target_project, merge_request):
    """Process the bot interactions in a merge request."""
    inttest_id = os.getenv('IT_GITLAB_MERGE_REQUEST')
    if inttest_id and inttest_id != str(merge_request.iid):
        return
    logging.info('Checking MR #%s from %s',
                 merge_request.attributes['iid'],
                 project_config['project'])
    discussions = merge_request.discussions.list(get_all=True)
    bot_login = target_project.manager.gitlab.user.username
    welcome_note_id = next((
        misc.get_nested_key(d.attributes, 'notes/0/id')
        for d in discussions
        if not misc.get_nested_key(d.attributes, 'notes/0/system') and
        misc.get_nested_key(d.attributes, 'notes/0/author/username') == bot_login
    ), None)
    if not welcome_note_id:
        welcome_note_id = create_welcome_note(
            pipelines_config, project_config, projects_config, merge_request)
    welcome_note_url = f'{merge_request.web_url}#note_{welcome_note_id}'
    for discussion in discussions:
        with misc.only_log_exceptions():
            if not discussion.attributes['notes'][0]['system']:
                process_discussion(
                    pipelines_config, project_config, target_project.manager.gitlab,
                    target_project, merge_request, discussion, welcome_note_url)


# pylint: disable=too-many-arguments,too-many-positional-arguments
def process_discussion(pipelines_config, project_config, target_gitlab,
                       target_project, merge_request, discussion, welcome_note_url):
    """Process a single bot interaction in a merge request."""
    bot_login = target_gitlab.user.username
    first_comment = discussion.attributes['notes'][0]
    body = first_comment['body']
    if f'@{bot_login}' in body and 'test' in body:
        note = merge_request.notes.get(first_comment['id'])
        awardemoji = first_awardemoji(bot_login, note)
        if awardemoji:
            if awardemoji.name == 'thumbsup':
                status_note = get_status_note(bot_login, discussion)
                if status_note:
                    pipelines = parse_status_note(status_note.body)
                    updated = update_pipelines(pipelines_config, pipelines)
                    if all_done(updated):
                        note_awardemoji(note, 'checkered_flag', awardemoji)
                        set_status_note(discussion, status_note,
                                        updated, welcome_note_url, force_notify=True)
                    else:
                        set_status_note(discussion, status_note, updated, welcome_note_url)
                else:
                    # cannot check pipelines without a status note, so stop
                    note_awardemoji(note, 'checkered_flag', awardemoji)
            return

        if not is_developer(target_project, note.author['id']):
            note_awardemoji(note, 'thumbsdown')
            discussion.notes.create({
                'body':
                f'Hi {note.author["username"]}! You don\'t have '
                'permissions to trigger testing. Please wait for a '
                'developer to review your MR.' + footer.Footer().gitlab_footer(),
            })
            return

        LOGGER.debug('Checking force-push or commit after %s', note.created_at)
        if (was_force_pushed(target_gitlab, merge_request, note.created_at)
                or was_committed(merge_request, note.created_at)):
            note_awardemoji(note, 'thumbsdown')
            discussion.notes.create({
                'body':
                'Hi! The MR code has been modified since testing was '
                'requested. Please review the new changes before asking '
                'again to test.' + footer.Footer().gitlab_footer(),
            })
            return

        # If we got here it means pipelines should be triggered
        pipelines = start_pipelines(pipelines_config, project_config,
                                    target_gitlab, merge_request, note)

        if not pipelines:
            note_awardemoji(note, 'thumbsdown')
            discussion.notes.create({
                'body':
                'Hi! I could not find pipelines matching your request. '
                'Please try again :hugging:' + footer.Footer().gitlab_footer(),
            })
            return

        status_note = get_status_note(bot_login, discussion)
        set_status_note(discussion, status_note,
                        pipelines, welcome_note_url, force_notify=True)
        note_awardemoji(note, 'thumbsup')


def main(args):
    """Process the bot interactions in all open merge requests."""
    parser = argparse.ArgumentParser(description='Pipeline trigger for GitLab repositories')
    parser.add_argument('--config-path',
                        default=os.environ.get('GITLAB_CI_BOT_CONFIG_PATH', 'config.yml'),
                        help='Path to the config file')
    parsed_args = parser.parse_args(args)

    module_config = yaml.load(contents=os.environ.get('GITLAB_CI_BOT_CONFIG'),
                              file_path=parsed_args.config_path)

    pipelines_config = module_config['pipelines']
    projects_config = module_config['projects']
    for project_config in projects_config.values():
        token = os.environ[project_config['private_token']]
        with gitlab.Gitlab(project_config['gitlab_url'],
                           private_token=token,
                           session=SESSION) as target_gitlab:
            target_gitlab.auth()
            target_project_path = project_config['project']
            target_project = target_gitlab.projects.get(target_project_path)
            for merge_request in target_project.mergerequests.list(
                    state='opened', iterator=True):
                with misc.only_log_exceptions():
                    process_merge_request(pipelines_config, project_config,
                                          projects_config, target_project,
                                          merge_request)


if __name__ == '__main__':
    misc.sentry_init(sentry_sdk)
    main(sys.argv[1:])
